################################################################################-
# Replication File for Wratil, Wäckerle and Proksch: Government Rhetoric and the 
# Representation of Public Opinion in International Negotiations
#
# This script runs the analysis presented in Table I8 in Appendix I.5.
#
# Additionally, the script produces the following graphs and tables:
# Table I8
################################################################################-

library(quanteda)     #version 3.2.1
library(tidyverse)    #version 1.3.2
library(tidytext)     #version 0.3.3
library(lmtest)       #version 0.9-39
library(multiwayvcov) #version 1.2.3
library(stargazer)    #version 5.2.3

load("generated_data/dfm_for_stm.RData")
data_dfm_main <- data_dfm
docvars(data_dfm_main) <- docvars(data_dfm_main) %>% 
  data.frame() %>% 
  select(gov_eu_supporter,image_lag6m_scaled,date_correct,Transcription,
         part_of_speech,text_copy,text_original,Actor,
         gov_lr_cmp_static_scaled,eu_receipts_gdp_scaled,budget_any,unanimity_any,
         unemployment_scaled,inflation_scaled,north_south,
         Negotiation_Stage,Council_Config_final)
data_dfm_main <- dfm_subset(data_dfm_main,complete.cases(docvars(data_dfm_main)))

data.sum <- docvars(data_dfm_main)

load(file="generated_data/stm_out_40_main_model.RData") 

td_gamma <- tidy(stm_out_40_main_model, matrix = "gamma",
                 document_names = rownames(data_dfm_main))

set.seed(1234)

#### Topic 12 ####
td_gamma_12 <- td_gamma %>% filter(topic==12) %>% rename("gamma_12" = "gamma")
mod_data_12 <- bind_cols(td_gamma_12,data.sum)

# Run OLS
m1 <- lm(gamma_12 ~ gov_eu_supporter*image_lag6m_scaled+
                part_of_speech+
                gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                unemployment_scaled+inflation_scaled+north_south+
                Negotiation_Stage+Council_Config_final,
              data=mod_data_12)

# Cluster by Actor
# Need to try the bootstrapped cluster a couple of times, because it fails sometimes
mod_data_12$Actor_num <- mod_data_12$Actor %>% as.factor %>% as.numeric()

working_cluster <- "no"
while(working_cluster == "no"){
  res_12 <- try(cluster.boot(m1, ~Actor_num,R=2000), TRUE)
  if(is.array(res_12)){
    working_cluster <- "yes"
    print("Worked!")
  }else{
    print("Try again...")
  }
}

sum.mod.12 <-summary(m1)
sum.mod.12.cluster <- coeftest(m1, res_12)

#### Topic 13 ####
td_gamma_13 <- td_gamma %>% filter(topic==13) %>% rename("gamma_13" = "gamma")
mod_data_13 <- bind_cols(td_gamma_13,data.sum)

# Run OLS
m2 <- lm(gamma_13 ~ gov_eu_supporter*image_lag6m_scaled+
                   part_of_speech+
                   gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                   unemployment_scaled+inflation_scaled+north_south+
                   Negotiation_Stage+Council_Config_final,
                 data=mod_data_13)

# Cluster by Actor
# Need to try the bootstrapped cluster a couple of times, because it fails sometimes
mod_data_13$Actor_num <- mod_data_13$Actor %>% as.factor %>% as.numeric()

working_cluster <- "no"
while(working_cluster == "no"){
  res_13 <- try(cluster.boot(m2, ~Actor_num,R=2000), TRUE)
  if(is.array(res_13)){
    working_cluster <- "yes"
    print("Worked!")
  }else{
    print("Try again...")
  }
}

sum.mod.13 <-summary(m2)
sum.mod.13.cluster <- coeftest(m2, res_13)

#### Topic 22 ####
td_gamma_22 <- td_gamma %>% filter(topic==22) %>% rename("gamma_22" = "gamma")
mod_data_22 <- bind_cols(td_gamma_22,data.sum)

# Run OLS
m3 <- lm(gamma_22 ~ gov_eu_supporter*image_lag6m_scaled+
                   part_of_speech+
                   gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                   unemployment_scaled+inflation_scaled+north_south+
                   Negotiation_Stage+Council_Config_final,
                 data=mod_data_22)

# Cluster by Actor
# Need to try the bootstrapped cluster a couple of times, because it fails sometimes
mod_data_22$Actor_num <- mod_data_22$Actor %>% as.factor %>% as.numeric()

working_cluster <- "no"
while(working_cluster == "no"){
  res_22 <- try(cluster.boot(m3, ~Actor_num,R=2000), TRUE)
  if(is.array(res_22)){
    working_cluster <- "yes"
    print("Worked!")
  }else{
    print("Try again...")
  }
}

sum.mod.22 <-summary(m3)
sum.mod.22.cluster <- coeftest(m3, res_22)

#### Topic 32 ####
td_gamma_32 <- td_gamma %>% filter(topic==32) %>% rename("gamma_32" = "gamma")
mod_data_32 <- bind_cols(td_gamma_32,data.sum)

# Run OLS
m4 <- lm(gamma_32 ~ gov_eu_supporter*image_lag6m_scaled+
                   part_of_speech+
                   gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                   unemployment_scaled+inflation_scaled+north_south+
                   Negotiation_Stage+Council_Config_final,
                 data=mod_data_32)

# Cluster by Actor
# Need to try the bootstrapped cluster a couple of times, because it fails sometimes
mod_data_32$Actor_num <- mod_data_32$Actor %>% as.factor %>% as.numeric()

working_cluster <- "no"
while(working_cluster == "no"){
  res_32 <- try(cluster.boot(m4, ~Actor_num,R=2000), TRUE)
  if(is.array(res_32)){
    working_cluster <- "yes"
    print("Worked!")
  }else{
    print("Try again...")
  }
}

sum.mod.32 <-summary(m4)
sum.mod.32.cluster <- coeftest(m4, res_32)

#### Topic 38 ####
td_gamma_38 <- td_gamma %>% filter(topic==38) %>% rename("gamma_38" = "gamma")
mod_data_38 <- bind_cols(td_gamma_38,data.sum)

# Run OLS
m5 <- lm(gamma_38 ~ gov_eu_supporter*image_lag6m_scaled+
                   part_of_speech+
                   gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                   unemployment_scaled+inflation_scaled+north_south+
                   Negotiation_Stage+Council_Config_final,
                 data=mod_data_38)

# Cluster by Actor
# Need to try the bootstrapped cluster a couple of times, because it fails sometimes
mod_data_38$Actor_num <- mod_data_38$Actor %>% as.factor %>% as.numeric()

working_cluster <- "no"
while(working_cluster == "no"){
  res_38 <- try(cluster.boot(m5, ~Actor_num,R=2000), TRUE)
  if(is.array(res_38)){
    working_cluster <- "yes"
    print("Worked!")
  }else{
    print("Try again...")
  }
}

sum.mod.38 <-summary(m5)
sum.mod.38.cluster <- coeftest(m5, res_38)

#### Topic 40 ####
td_gamma_40 <- td_gamma %>% filter(topic==40) %>% rename("gamma_40" = "gamma")
mod_data_40 <- bind_cols(td_gamma_40,data.sum)

# Run OLS
m6 <- lm(gamma_40 ~ gov_eu_supporter*image_lag6m_scaled+
                   part_of_speech+
                   gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                   unemployment_scaled+inflation_scaled+north_south+
                   Negotiation_Stage+Council_Config_final,
                 data=mod_data_40)

# Cluster by Actor
# Need to try the bootstrapped cluster a couple of times, because it fails sometimes
mod_data_40$Actor_num <- mod_data_40$Actor %>% as.factor %>% as.numeric()

working_cluster <- "no"
while(working_cluster == "no"){
  res_40 <- try(cluster.boot(m6, ~Actor_num,R=2000), TRUE)
  if(is.array(res_40)){
    working_cluster <- "yes"
    print("Worked!")
  }else{
    print("Try again...")
  }
}

sum.mod.40 <-summary(m6)
sum.mod.40.cluster <- coeftest(m6, res_40)

# This is Table I8
stargazer(m1,m1,
                     m2,m2,
                     m3,m3,
                     m4,m4,
                     m5,m5,
                     m6,m6,
                     se = list(sum.mod.12$coefficients[,2],
                               sum.mod.12.cluster[,2],
                               sum.mod.13$coefficients[,2],
                               sum.mod.13.cluster[,2],
                               sum.mod.22$coefficients[,2],
                               sum.mod.22.cluster[,2],
                               sum.mod.32$coefficients[,2],
                               sum.mod.32.cluster[,2],
                               sum.mod.38$coefficients[,2],
                               sum.mod.38.cluster[,2],
                               sum.mod.40$coefficients[,2],
                               sum.mod.40.cluster[,2]),
                     #type="text",
                     align = TRUE,
                     dep.var.labels   = c("Topic Prevalence Topic 12","Topic Prevalence Topic 13",
                                          "Topic Prevalence Topic 22","Topic Prevalence Topic 32",
                                          "Topic Prevalence Topic 38","Topic Prevalence Topic 40"),
                     column.labels = c("Standard OLS","Clustered OLS",
                                       "Standard OLS","Clustered OLS",
                                       "Standard OLS","Clustered OLS",
                                       "Standard OLS","Clustered OLS",
                                       "Standard OLS","Clustered OLS",
                                       "Standard OLS","Clustered OLS"),
                     covariate.labels = c("Intercept","Eurosceptic Government", "Public Image of the EU", "Middle Part of Speech",
                                          "End Part of Speech", "Government Left-Right position", "Net receipts from EU budget",
                                          "Budget issue","Unanimity Required","Unemployment Rate","Inflation Rate",
                                          "Northern Europe","Southern Europe","Negotiation stage: Initial Presentation",
                                          "Negotiation stage: Mixed Negotiations","Negotiation stage: Policy Debates",
                                          "Council configuration: Ecofin","Council configuration: EPSCO",
                                          "Council configuration: ENV","Council configuration: JHA",
                                          "Eurosceptic Government x Public Image of the EU"),
                     omit.stat = c("f","ll","rsq","adj.rsq","ser"),
                     intercept.bottom = FALSE)

